***********************************************************************
*This file cleans data based on institution characteristics from IPEDS*
***********************************************************************

/* years that we do not use in the analysis will not be 
appended but I create a different variables for each year so that 
I can use the info in case it is not available in the year we use */

***** Years that we use in the analysis
*** create dta files from csv, labeling
cd "$raw_data_education/institutionDetails"

foreach i in 1989 1990 1991 {
cap do "$do_clean_education/IPEDS - do/do institutionDetails/ic`i'_hdr.do"
keep unitid instnm sector control iclevel
gen year = `i'
save "$clean_data_education/institutionDetails_`i'.dta", replace
clear
}
**************************************
foreach i in 1992 1993 1994 1995 {
cap do "$do_clean_education/IPEDS - do/do institutionDetails/ic`i'_a.do"
keep unitid instnm sector control iclevel
gen year = `i'
save "$clean_data_education/institutionDetails_`i'.dta", replace
clear
}

**************************************
foreach i in 1999 {
cap do "$do_clean_education/IPEDS - do/do institutionDetails/ic`i'hd.do"
keep unitid instnm sector control iclevel
gen year = `i'
save "$clean_data_education/institutionDetails_`i'.dta", replace
clear
}

**************************************
foreach i in 2000 2001 2006 2007 2008 2013 2014 2015 {
do "$do_clean_education/IPEDS - do/do institutionDetails/fa`i'hd.do"
keep unitid instnm sector control iclevel
gen year = `i'
save "$clean_data_education/institutionDetails_`i'.dta", replace
clear
}

**************************************

* append
use "$clean_data_education/institutionDetails_1989.dta", clear
foreach i in 1990 1991 1992 1993 1994 1995 1999 2000 2001 2006 2007 2008 2013 2014 2015 {
append using "$clean_data_education/institutionDetails_`i'.dta"
rm "$clean_data_education/institutionDetails_`i'.dta"
}
rm "$clean_data_education/institutionDetails_1989.dta"


* typos 
replace sector = 3 if unitid == 110185 & year > 2013
replace control = 3 if unitid == 110185 & year > 2013
replace sector = 2 if unitid == 120698  & year == 2008
replace control = 2 if unitid == 120698 & year == 2008
replace sector = 2  if unitid == 223214 & year == 1999
replace control = 2  if unitid == 223214 & year == 1999
replace sector = 3 if unitid == 385619  & year == 2001 
replace control = 3 if unitid == 385619  & year == 2001

* final cleanings
run "$do_clean_education/clean institutions - id.do"

save "$clean_data_education/institutionDetailsOriginal.dta", replace


*** Build three year averages
* year group
gen yearGroup = 0
	replace yearGroup = 1990 if year <= 1991 
	replace yearGroup = 1994 if year >= 1992 & year <= 1994
	replace yearGroup = 2000 if year >= 1999 & year <= 2001
	replace yearGroup = 2007 if year >= 2006 & year <= 2008
	replace yearGroup = 2014 if year >= 2013

**********************************************************
* Run it all together, save labels and collapse
foreach v of var * {
local l`v' : variable label `v'
	if `"`l`v''"' == "" {
	local l`v' "`v'"
  	}
}
collapse (last) instnm sector control iclevel, by(unitid yearGroup)

foreach v of var * {
label var `v' "`l`v''"
}
**********************************************************

save "$clean_data_education/institutionDetails.dta", replace

